1   package org.apache.lucene.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.nio.charset.StandardCharsets;
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Random;
30  import java.util.concurrent.atomic.AtomicReference;
31  
32  import org.apache.lucene.analysis.MockAnalyzer;
33  import org.apache.lucene.codecs.Codec;
34  import org.apache.lucene.codecs.StoredFieldsFormat;
35  import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
36  import org.apache.lucene.document.Document;
37  import org.apache.lucene.document.DoubleField;
38  import org.apache.lucene.document.Field;
39  import org.apache.lucene.document.Field.Store;
40  import org.apache.lucene.document.FieldType;
41  import org.apache.lucene.document.FieldType.NumericType;
42  import org.apache.lucene.document.FloatField;
43  import org.apache.lucene.document.IntField;
44  import org.apache.lucene.document.LongField;
45  import org.apache.lucene.document.NumericDocValuesField;
46  import org.apache.lucene.document.StoredField;
47  import org.apache.lucene.document.StringField;
48  import org.apache.lucene.document.TextField;
49  import org.apache.lucene.search.IndexSearcher;
50  import org.apache.lucene.search.NumericRangeQuery;
51  import org.apache.lucene.search.Query;
52  import org.apache.lucene.search.TermQuery;
53  import org.apache.lucene.search.TopDocs;
54  import org.apache.lucene.store.Directory;
55  import org.apache.lucene.store.MMapDirectory;
56  import org.apache.lucene.store.MockDirectoryWrapper;
57  import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
58  import org.apache.lucene.util.BytesRef;
59  import org.apache.lucene.util.IOUtils;
60  import org.apache.lucene.util.TestUtil;
61  
62  import com.carrotsearch.randomizedtesting.generators.RandomInts;
63  import com.carrotsearch.randomizedtesting.generators.RandomPicks;
64  import com.carrotsearch.randomizedtesting.generators.RandomStrings;
65  
66  /**
67   * Base class aiming at testing {@link StoredFieldsFormat stored fields formats}.
68   * To test a new format, all you need is to register a new {@link Codec} which
69   * uses it and extend this class and override {@link #getCodec()}.
70   * @lucene.experimental
71   */
72  public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormatTestCase {
73  
74    @Override
75    protected void addRandomFields(Document d) {
76      final int numValues = random().nextInt(3);
77      for (int i = 0; i < numValues; ++i) {
78        d.add(new StoredField("f", TestUtil.randomSimpleString(random(), 100)));
79      }
80    }
81  
82    public void testRandomStoredFields() throws IOException {
83      Directory dir = newDirectory();
84      Random rand = random();
85      RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(rand, 5, 20)));
86      //w.w.setNoCFSRatio(0.0);
87      final int docCount = atLeast(200);
88      final int fieldCount = TestUtil.nextInt(rand, 1, 5);
89  
90      final List<Integer> fieldIDs = new ArrayList<>();
91  
92      FieldType customType = new FieldType(TextField.TYPE_STORED);
93      customType.setTokenized(false);
94      Field idField = newField("id", "", customType);
95  
96      for(int i=0;i<fieldCount;i++) {
97        fieldIDs.add(i);
98      }
99  
100     final Map<String,Document> docs = new HashMap<>();
101 
102     if (VERBOSE) {
103       System.out.println("TEST: build index docCount=" + docCount);
104     }
105 
106     FieldType customType2 = new FieldType();
107     customType2.setStored(true);
108     for(int i=0;i<docCount;i++) {
109       Document doc = new Document();
110       doc.add(idField);
111       final String id = ""+i;
112       idField.setStringValue(id);
113       docs.put(id, doc);
114       if (VERBOSE) {
115         System.out.println("TEST: add doc id=" + id);
116       }
117 
118       for(int field: fieldIDs) {
119         final String s;
120         if (rand.nextInt(4) != 3) {
121           s = TestUtil.randomUnicodeString(rand, 1000);
122           doc.add(newField("f"+field, s, customType2));
123         } else {
124           s = null;
125         }
126       }
127       w.addDocument(doc);
128       if (rand.nextInt(50) == 17) {
129         // mixup binding of field name -> Number every so often
130         Collections.shuffle(fieldIDs, random());
131       }
132       if (rand.nextInt(5) == 3 && i > 0) {
133         final String delID = ""+rand.nextInt(i);
134         if (VERBOSE) {
135           System.out.println("TEST: delete doc id=" + delID);
136         }
137         w.deleteDocuments(new Term("id", delID));
138         docs.remove(delID);
139       }
140     }
141 
142     if (VERBOSE) {
143       System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
144     }
145     if (docs.size() > 0) {
146       String[] idsList = docs.keySet().toArray(new String[docs.size()]);
147 
148       for(int x=0;x<2;x++) {
149         IndexReader r = w.getReader();
150         IndexSearcher s = newSearcher(r);
151 
152         if (VERBOSE) {
153           System.out.println("TEST: cycle x=" + x + " r=" + r);
154         }
155 
156         int num = atLeast(1000);
157         for(int iter=0;iter<num;iter++) {
158           String testID = idsList[rand.nextInt(idsList.length)];
159           if (VERBOSE) {
160             System.out.println("TEST: test id=" + testID);
161           }
162           TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
163           assertEquals(1, hits.totalHits);
164           Document doc = r.document(hits.scoreDocs[0].doc);
165           Document docExp = docs.get(testID);
166           for(int i=0;i<fieldCount;i++) {
167             assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i),  doc.get("f"+i));
168           }
169         }
170         r.close();
171         w.forceMerge(1);
172       }
173     }
174     w.close();
175     dir.close();
176   }
177   
178   // LUCENE-1727: make sure doc fields are stored in order
179   public void testStoredFieldsOrder() throws Throwable {
180     Directory d = newDirectory();
181     IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
182     Document doc = new Document();
183 
184     FieldType customType = new FieldType();
185     customType.setStored(true);
186     doc.add(newField("zzz", "a b c", customType));
187     doc.add(newField("aaa", "a b c", customType));
188     doc.add(newField("zzz", "1 2 3", customType));
189     w.addDocument(doc);
190     IndexReader r = w.getReader();
191     Document doc2 = r.document(0);
192     Iterator<IndexableField> it = doc2.getFields().iterator();
193     assertTrue(it.hasNext());
194     Field f = (Field) it.next();
195     assertEquals(f.name(), "zzz");
196     assertEquals(f.stringValue(), "a b c");
197 
198     assertTrue(it.hasNext());
199     f = (Field) it.next();
200     assertEquals(f.name(), "aaa");
201     assertEquals(f.stringValue(), "a b c");
202 
203     assertTrue(it.hasNext());
204     f = (Field) it.next();
205     assertEquals(f.name(), "zzz");
206     assertEquals(f.stringValue(), "1 2 3");
207     assertFalse(it.hasNext());
208     r.close();
209     w.close();
210     d.close();
211   }
212   
213   // LUCENE-1219
214   public void testBinaryFieldOffsetLength() throws IOException {
215     Directory dir = newDirectory();
216     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
217     byte[] b = new byte[50];
218     for(int i=0;i<50;i++)
219       b[i] = (byte) (i+77);
220 
221     Document doc = new Document();
222     Field f = new StoredField("binary", b, 10, 17);
223     byte[] bx = f.binaryValue().bytes;
224     assertTrue(bx != null);
225     assertEquals(50, bx.length);
226     assertEquals(10, f.binaryValue().offset);
227     assertEquals(17, f.binaryValue().length);
228     doc.add(f);
229     w.addDocument(doc);
230     w.close();
231 
232     IndexReader ir = DirectoryReader.open(dir);
233     Document doc2 = ir.document(0);
234     IndexableField f2 = doc2.getField("binary");
235     b = f2.binaryValue().bytes;
236     assertTrue(b != null);
237     assertEquals(17, b.length, 17);
238     assertEquals(87, b[0]);
239     ir.close();
240     dir.close();
241   }
242   
243   public void testNumericField() throws Exception {
244     Directory dir = newDirectory();
245     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
246     final int numDocs = atLeast(500);
247     final Number[] answers = new Number[numDocs];
248     final NumericType[] typeAnswers = new NumericType[numDocs];
249     for(int id=0;id<numDocs;id++) {
250       Document doc = new Document();
251       final Field nf;
252       final Field sf;
253       final Number answer;
254       final NumericType typeAnswer;
255       if (random().nextBoolean()) {
256         // float/double
257         if (random().nextBoolean()) {
258           final float f = random().nextFloat();
259           answer = Float.valueOf(f);
260           nf = new FloatField("nf", f, Field.Store.NO);
261           sf = new StoredField("nf", f);
262           typeAnswer = NumericType.FLOAT;
263         } else {
264           final double d = random().nextDouble();
265           answer = Double.valueOf(d);
266           nf = new DoubleField("nf", d, Field.Store.NO);
267           sf = new StoredField("nf", d);
268           typeAnswer = NumericType.DOUBLE;
269         }
270       } else {
271         // int/long
272         if (random().nextBoolean()) {
273           final int i = random().nextInt();
274           answer = Integer.valueOf(i);
275           nf = new IntField("nf", i, Field.Store.NO);
276           sf = new StoredField("nf", i);
277           typeAnswer = NumericType.INT;
278         } else {
279           final long l = random().nextLong();
280           answer = Long.valueOf(l);
281           nf = new LongField("nf", l, Field.Store.NO);
282           sf = new StoredField("nf", l);
283           typeAnswer = NumericType.LONG;
284         }
285       }
286       doc.add(nf);
287       doc.add(sf);
288       answers[id] = answer;
289       typeAnswers[id] = typeAnswer;
290       FieldType ft = new FieldType(IntField.TYPE_STORED);
291       ft.setNumericPrecisionStep(Integer.MAX_VALUE);
292       doc.add(new IntField("id", id, ft));
293       doc.add(new NumericDocValuesField("id", id));
294       w.addDocument(doc);
295     }
296     final DirectoryReader r = w.getReader();
297     w.close();
298     
299     assertEquals(numDocs, r.numDocs());
300 
301     for(LeafReaderContext ctx : r.leaves()) {
302       final LeafReader sub = ctx.reader();
303       final NumericDocValues ids = DocValues.getNumeric(sub, "id");
304       for(int docID=0;docID<sub.numDocs();docID++) {
305         final Document doc = sub.document(docID);
306         final Field f = (Field) doc.getField("nf");
307         assertTrue("got f=" + f, f instanceof StoredField);
308         assertEquals(answers[(int) ids.get(docID)], f.numericValue());
309       }
310     }
311     r.close();
312     dir.close();
313   }
314 
315   public void testIndexedBit() throws Exception {
316     Directory dir = newDirectory();
317     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
318     Document doc = new Document();
319     FieldType onlyStored = new FieldType();
320     onlyStored.setStored(true);
321     doc.add(new Field("field", "value", onlyStored));
322     doc.add(new StringField("field2", "value", Field.Store.YES));
323     w.addDocument(doc);
324     IndexReader r = w.getReader();
325     w.close();
326     assertEquals(IndexOptions.NONE, r.document(0).getField("field").fieldType().indexOptions());
327     assertNotNull(r.document(0).getField("field2").fieldType().indexOptions());
328     r.close();
329     dir.close();
330   }
331   
332   public void testReadSkip() throws IOException {
333     Directory dir = newDirectory();
334     IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
335     iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
336     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
337     
338     FieldType ft = new FieldType();
339     ft.setStored(true);
340     ft.freeze();
341 
342     final String string = TestUtil.randomSimpleString(random(), 50);
343     final byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
344     final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
345     final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
346     final float f = random().nextFloat();
347     final double d = random().nextDouble();
348 
349     List<Field> fields = Arrays.asList(
350         new Field("bytes", bytes, ft),
351         new Field("string", string, ft),
352         new LongField("long", l, Store.YES),
353         new IntField("int", i, Store.YES),
354         new FloatField("float", f, Store.YES),
355         new DoubleField("double", d, Store.YES)
356     );
357 
358     for (int k = 0; k < 100; ++k) {
359       Document doc = new Document();
360       for (Field fld : fields) {
361         doc.add(fld);
362       }
363       iw.w.addDocument(doc);
364     }
365     iw.commit();
366 
367     final DirectoryReader reader = DirectoryReader.open(dir);
368     final int docID = random().nextInt(100);
369     for (Field fld : fields) {
370       String fldName = fld.name();
371       final Document sDoc = reader.document(docID, Collections.singleton(fldName));
372       final IndexableField sField = sDoc.getField(fldName);
373       if (Field.class.equals(fld.getClass())) {
374         assertEquals(fld.binaryValue(), sField.binaryValue());
375         assertEquals(fld.stringValue(), sField.stringValue());
376       } else {
377         assertEquals(fld.numericValue(), sField.numericValue());
378       }
379     }
380     reader.close();
381     iw.close();
382     dir.close();
383   }
384   
385   public void testEmptyDocs() throws IOException {
386     Directory dir = newDirectory();
387     IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
388     iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
389     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
390     
391     // make sure that the fact that documents might be empty is not a problem
392     final Document emptyDoc = new Document();
393     final int numDocs = random().nextBoolean() ? 1 : atLeast(1000);
394     for (int i = 0; i < numDocs; ++i) {
395       iw.addDocument(emptyDoc);
396     }
397     iw.commit();
398     final DirectoryReader rd = DirectoryReader.open(dir);
399     for (int i = 0; i < numDocs; ++i) {
400       final Document doc = rd.document(i);
401       assertNotNull(doc);
402       assertTrue(doc.getFields().isEmpty());
403     }
404     rd.close();
405     
406     iw.close();
407     dir.close();
408   }
409   
410   public void testConcurrentReads() throws Exception {
411     Directory dir = newDirectory();
412     IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
413     iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
414     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
415     
416     // make sure the readers are properly cloned
417     final Document doc = new Document();
418     final Field field = new StringField("fld", "", Store.YES);
419     doc.add(field);
420     final int numDocs = atLeast(1000);
421     for (int i = 0; i < numDocs; ++i) {
422       field.setStringValue("" + i);
423       iw.addDocument(doc);
424     }
425     iw.commit();
426 
427     final DirectoryReader rd = DirectoryReader.open(dir);
428     final IndexSearcher searcher = new IndexSearcher(rd);
429     final int concurrentReads = atLeast(5);
430     final int readsPerThread = atLeast(50);
431     final List<Thread> readThreads = new ArrayList<>();
432     final AtomicReference<Exception> ex = new AtomicReference<>();
433     for (int i = 0; i < concurrentReads; ++i) {
434       readThreads.add(new Thread() {
435 
436         int[] queries;
437 
438         {
439           queries = new int[readsPerThread];
440           for (int i = 0; i < queries.length; ++i) {
441             queries[i] = random().nextInt(numDocs);
442           }
443         }
444 
445         @Override
446         public void run() {
447           for (int q : queries) {
448             final Query query = new TermQuery(new Term("fld", "" + q));
449             try {
450               final TopDocs topDocs = searcher.search(query, 1);
451               if (topDocs.totalHits != 1) {
452                 throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits);
453               }
454               final Document sdoc = rd.document(topDocs.scoreDocs[0].doc);
455               if (sdoc == null || sdoc.get("fld") == null) {
456                 throw new IllegalStateException("Could not find document " + q);
457               }
458               if (!Integer.toString(q).equals(sdoc.get("fld"))) {
459                 throw new IllegalStateException("Expected " + q + ", but got " + sdoc.get("fld"));
460               }
461             } catch (Exception e) {
462               ex.compareAndSet(null, e);
463             }
464           }
465         }
466       });
467     }
468     for (Thread thread : readThreads) {
469       thread.start();
470     }
471     for (Thread thread : readThreads) {
472       thread.join();
473     }
474     rd.close();
475     if (ex.get() != null) {
476       throw ex.get();
477     }
478     
479     iw.close();
480     dir.close();
481   }
482   
483   private byte[] randomByteArray(int length, int max) {
484     final byte[] result = new byte[length];
485     for (int i = 0; i < length; ++i) {
486       result[i] = (byte) random().nextInt(max);
487     }
488     return result;
489   }
490   
491   public void testWriteReadMerge() throws IOException {
492     // get another codec, other than the default: so we are merging segments across different codecs
493     final Codec otherCodec;
494     if ("SimpleText".equals(Codec.getDefault().getName())) {
495       otherCodec = TestUtil.getDefaultCodec();
496     } else {
497       otherCodec = new SimpleTextCodec();
498     }
499     Directory dir = newDirectory();
500     IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
501     iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
502     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
503     
504     final int docCount = atLeast(200);
505     final byte[][][] data = new byte [docCount][][];
506     for (int i = 0; i < docCount; ++i) {
507       final int fieldCount = rarely()
508           ? RandomInts.randomIntBetween(random(), 1, 500)
509           : RandomInts.randomIntBetween(random(), 1, 5);
510       data[i] = new byte[fieldCount][];
511       for (int j = 0; j < fieldCount; ++j) {
512         final int length = rarely()
513             ? random().nextInt(1000)
514             : random().nextInt(10);
515         final int max = rarely() ? 256 : 2;
516         data[i][j] = randomByteArray(length, max);
517       }
518     }
519 
520     final FieldType type = new FieldType(StringField.TYPE_STORED);
521     type.setIndexOptions(IndexOptions.NONE);
522     type.freeze();
523     IntField id = new IntField("id", 0, Store.YES);
524     for (int i = 0; i < data.length; ++i) {
525       Document doc = new Document();
526       doc.add(id);
527       id.setIntValue(i);
528       for (int j = 0; j < data[i].length; ++j) {
529         Field f = new Field("bytes" + j, data[i][j], type);
530         doc.add(f);
531       }
532       iw.w.addDocument(doc);
533       if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
534         iw.w.close();
535         IndexWriterConfig iwConfNew = newIndexWriterConfig(new MockAnalyzer(random()));
536         // test merging against a non-compressing codec
537         if (iwConf.getCodec() == otherCodec) {
538           iwConfNew.setCodec(Codec.getDefault());
539         } else {
540           iwConfNew.setCodec(otherCodec);
541         }
542         iwConf = iwConfNew;
543         iw = new RandomIndexWriter(random(), dir, iwConf);
544       }
545     }
546 
547     for (int i = 0; i < 10; ++i) {
548       final int min = random().nextInt(data.length);
549       final int max = min + random().nextInt(20);
550       iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false));
551     }
552 
553     iw.forceMerge(2); // force merges with deletions
554 
555     iw.commit();
556 
557     final DirectoryReader ir = DirectoryReader.open(dir);
558     assertTrue(ir.numDocs() > 0);
559     int numDocs = 0;
560     for (int i = 0; i < ir.maxDoc(); ++i) {
561       final Document doc = ir.document(i);
562       if (doc == null) {
563         continue;
564       }
565       ++ numDocs;
566       final int docId = doc.getField("id").numericValue().intValue();
567       assertEquals(data[docId].length + 1, doc.getFields().size());
568       for (int j = 0; j < data[docId].length; ++j) {
569         final byte[] arr = data[docId][j];
570         final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
571         final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
572         assertArrayEquals(arr, arr2);
573       }
574     }
575     assertTrue(ir.numDocs() <= numDocs);
576     ir.close();
577 
578     iw.deleteAll();
579     iw.commit();
580     iw.forceMerge(1);
581     
582     iw.close();
583     dir.close();
584   }
585 
586   /** A dummy filter reader that reverse the order of documents in stored fields. */
587   private static class DummyFilterLeafReader extends FilterLeafReader {
588 
589     public DummyFilterLeafReader(LeafReader in) {
590       super(in);
591     }
592 
593     @Override
594     public void document(int docID, StoredFieldVisitor visitor) throws IOException {
595       super.document(maxDoc() - 1 - docID, visitor);
596     }
597 
598   }
599 
600   private static class DummyFilterDirectoryReader extends FilterDirectoryReader {
601 
602     public DummyFilterDirectoryReader(DirectoryReader in) throws IOException {
603       super(in, new SubReaderWrapper() {
604         @Override
605         public LeafReader wrap(LeafReader reader) {
606           return new DummyFilterLeafReader(reader);
607         }
608       });
609     }
610 
611     @Override
612     protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
613       return new DummyFilterDirectoryReader(in);
614     }
615     
616   }
617 
618   public void testMergeFilterReader() throws IOException {
619     Directory dir = newDirectory();
620     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
621     final int numDocs = atLeast(200);
622     final String[] stringValues = new String[10];
623     for (int i = 0; i < stringValues.length; ++i) {
624       stringValues[i] = RandomStrings.randomRealisticUnicodeOfLength(random(), 10);
625     }
626     Document[] docs = new Document[numDocs];
627     for (int i = 0; i < numDocs; ++i) {
628       Document doc = new Document();
629       doc.add(new StringField("to_delete", random().nextBoolean() ? "yes" : "no", Store.NO));
630       doc.add(new StoredField("id", i));
631       doc.add(new StoredField("i", random().nextInt(50)));
632       doc.add(new StoredField("l", random().nextLong()));
633       doc.add(new StoredField("d", random().nextDouble()));
634       doc.add(new StoredField("f", random().nextFloat()));
635       doc.add(new StoredField("s", RandomPicks.randomFrom(random(), stringValues)));
636       doc.add(new StoredField("b", new BytesRef(RandomPicks.randomFrom(random(), stringValues))));
637       docs[i] = doc;
638       w.addDocument(doc);
639     }
640     if (random().nextBoolean()) {
641       w.deleteDocuments(new Term("to_delete", "yes"));
642     }
643     w.commit();
644     w.close();
645     
646     DirectoryReader reader = new DummyFilterDirectoryReader(DirectoryReader.open(dir));
647     
648     Directory dir2 = newDirectory();
649     w = new RandomIndexWriter(random(), dir2);
650     TestUtil.addIndexesSlowly(w.w, reader);
651     reader.close();
652     dir.close();
653 
654     reader = w.getReader();
655     for (int i = 0; i < reader.maxDoc(); ++i) {
656       final Document doc = reader.document(i);
657       final int id = doc.getField("id").numericValue().intValue();
658       final Document expected = docs[id];
659       assertEquals(expected.get("s"), doc.get("s"));
660       assertEquals(expected.getField("i").numericValue(), doc.getField("i").numericValue());
661       assertEquals(expected.getField("l").numericValue(), doc.getField("l").numericValue());
662       assertEquals(expected.getField("d").numericValue(), doc.getField("d").numericValue());
663       assertEquals(expected.getField("f").numericValue(), doc.getField("f").numericValue());
664       assertEquals(expected.getField("b").binaryValue(), doc.getField("b").binaryValue());
665     }
666 
667     reader.close();
668     w.close();
669     TestUtil.checkIndex(dir2);
670     dir2.close();
671   }
672 
673   @Nightly
674   public void testBigDocuments() throws IOException {
675     // "big" as "much bigger than the chunk size"
676     // for this test we force a FS dir
677     // we can't just use newFSDirectory, because this test doesn't really index anything.
678     // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
679     Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments")));
680     IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
681     iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
682     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
683 
684     if (dir instanceof MockDirectoryWrapper) {
685       ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
686     }
687 
688     final Document emptyDoc = new Document(); // emptyDoc
689     final Document bigDoc1 = new Document(); // lot of small fields
690     final Document bigDoc2 = new Document(); // 1 very big field
691 
692     final Field idField = new StringField("id", "", Store.NO);
693     emptyDoc.add(idField);
694     bigDoc1.add(idField);
695     bigDoc2.add(idField);
696 
697     final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
698     onlyStored.setIndexOptions(IndexOptions.NONE);
699 
700     final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
701     final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000);
702     for (int i = 0; i < numFields; ++i) {
703       bigDoc1.add(smallField);
704     }
705 
706     final Field bigField = new Field("fld", randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored);
707     bigDoc2.add(bigField);
708 
709     final int numDocs = atLeast(5);
710     final Document[] docs = new Document[numDocs];
711     for (int i = 0; i < numDocs; ++i) {
712       docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
713     }
714     for (int i = 0; i < numDocs; ++i) {
715       idField.setStringValue("" + i);
716       iw.addDocument(docs[i]);
717       if (random().nextInt(numDocs) == 0) {
718         iw.commit();
719       }
720     }
721     iw.commit();
722     iw.forceMerge(1); // look at what happens when big docs are merged
723     final DirectoryReader rd = DirectoryReader.open(dir);
724     final IndexSearcher searcher = new IndexSearcher(rd);
725     for (int i = 0; i < numDocs; ++i) {
726       final Query query = new TermQuery(new Term("id", "" + i));
727       final TopDocs topDocs = searcher.search(query, 1);
728       assertEquals("" + i, 1, topDocs.totalHits);
729       final Document doc = rd.document(topDocs.scoreDocs[0].doc);
730       assertNotNull(doc);
731       final IndexableField[] fieldValues = doc.getFields("fld");
732       assertEquals(docs[i].getFields("fld").length, fieldValues.length);
733       if (fieldValues.length > 0) {
734         assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
735       }
736     }
737     rd.close();
738     iw.close();
739     dir.close();
740   }
741 
742   public void testBulkMergeWithDeletes() throws IOException {
743     final int numDocs = atLeast(200);
744     Directory dir = newDirectory();
745     RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
746     for (int i = 0; i < numDocs; ++i) {
747       Document doc = new Document();
748       doc.add(new StringField("id", Integer.toString(i), Store.YES));
749       doc.add(new StoredField("f", TestUtil.randomSimpleString(random())));
750       w.addDocument(doc);
751     }
752     final int deleteCount = TestUtil.nextInt(random(), 5, numDocs);
753     for (int i = 0; i < deleteCount; ++i) {
754       final int id = random().nextInt(numDocs);
755       w.deleteDocuments(new Term("id", Integer.toString(id)));
756     }
757     w.commit();
758     w.close();
759     w = new RandomIndexWriter(random(), dir);
760     w.forceMerge(TestUtil.nextInt(random(), 1, 3));
761     w.commit();
762     w.close();
763     TestUtil.checkIndex(dir);
764     dir.close();
765   }
766 
767   /** mix up field numbers, merge, and check that data is correct */
768   public void testMismatchedFields() throws Exception {
769     Directory dirs[] = new Directory[10];
770     for (int i = 0; i < dirs.length; i++) {
771       Directory dir = newDirectory();
772       IndexWriterConfig iwc = new IndexWriterConfig(null);
773       IndexWriter iw = new IndexWriter(dir, iwc);
774       Document doc = new Document();
775       for (int j = 0; j < 10; j++) {
776         // add fields where name=value (e.g. 3=3) so we can detect if stuff gets screwed up.
777         doc.add(new StringField(Integer.toString(j), Integer.toString(j), Field.Store.YES));
778       }
779       for (int j = 0; j < 10; j++) {
780         iw.addDocument(doc);
781       }
782       
783       DirectoryReader reader = DirectoryReader.open(iw, true);
784       // mix up fields explicitly
785       if (random().nextBoolean()) {
786         reader = new MismatchedDirectoryReader(reader, random());
787       }
788       dirs[i] = newDirectory();
789       IndexWriter adder = new IndexWriter(dirs[i], new IndexWriterConfig(null));
790       TestUtil.addIndexesSlowly(adder, reader);
791       adder.commit();
792       adder.close();
793       
794       IOUtils.close(reader, iw, dir);
795     }
796     
797     Directory everything = newDirectory();
798     IndexWriter iw = new IndexWriter(everything, new IndexWriterConfig(null));
799     iw.addIndexes(dirs);
800     iw.forceMerge(1);
801     
802     LeafReader ir = getOnlySegmentReader(DirectoryReader.open(iw, true));
803     for (int i = 0; i < ir.maxDoc(); i++) {
804       Document doc = ir.document(i);
805       assertEquals(10, doc.getFields().size());
806       for (int j = 0; j < 10; j++) {
807         assertEquals(Integer.toString(j), doc.get(Integer.toString(j)));
808       }
809     }
810 
811     IOUtils.close(iw, ir, everything);
812     IOUtils.close(dirs);
813   }
814 }